@InProceedings{Otiniano-RodríguezCáma:2013:FiSpRe,
author = "Otiniano-Rodr{\'{\i}}guez, K. and C{\'a}mara-Ch{\'a}vez, G.",
affiliation = "{Federal University of Ouro Preto} and {Federal University of Ouro
Preto}",
title = "Finger Spelling Recognition from RGB-D Information using Kernel
Descriptor",
booktitle = "Proceedings...",
year = "2013",
editor = "Boyer, Kim and Hirata, Nina and Nedel, Luciana and Silva,
Claudio",
organization = "Conference on Graphics, Patterns and Images, 26. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "sign language, finger spelling, support vector machine (SVM),
bag-of-visual-words.",
abstract = "Deaf people use systems of communication based on sign language
and finger spelling. Manual spelling, or finger spelling, is a
system where each letter of the alphabet is represented by an
unique and discrete movement of the hand. RGB and depth images can
be used to characterize hand shapes corresponding to letters of
the alphabet. The advantage of depth cameras over color cameras
for gesture recognition is more evident when performing hand
segmentation. In this paper, we propose a hybrid system approach
for finger spelling recognition using RGB-D information from
Kinect sensor. In a first stage, the hand area is segmented from
background using depth map and precise hand shape is extracted
using both depth data and color data from Kinect sensor. Motivated
by the performance of kernel based features, due to its simplicity
and the ability to turn any type of pixel attribute into
patch-level features, we decided to use the gradient kernel
descriptor for feature extraction from depth images. The
Scale-Invariant Feature Transform (SIFT) is used for describing
the content of the RGB image. Then, the Bag-of-Visual-Words
approach is used to extract semantic information. Finally, these
features are used as input of our Support Vector Machine (SVM)
classifier. The performance of this approach is quantitatively and
qualitatively evaluated on a dataset of real images of American
Sign Language (ASL) hand shapes. Three experiments were performed,
using a combination of RGB and depth information and also using
only RGB or depth information separately. The database used is
composed of 120,000 images. According to our experiments, our
approach has an accuracy rate of 91.26% when RGB and depth
information is used, outperforming other state-of-the-art
methods.",
conference-location = "Arequipa, Peru",
conference-year = "5-8 Aug. 2013",
doi = "10.1109/SIBGRAPI.2013.10",
url = "http://dx.doi.org/10.1109/SIBGRAPI.2013.10",
language = "en",
ibi = "8JMKD3MGPBW34M/3EEQR3H",
url = "http://urlib.net/ibi/8JMKD3MGPBW34M/3EEQR3H",
targetfile = "final paper.pdf",
urlaccessdate = "2024, Apr. 29"
}